# -*- coding: UTF-8 -*-
import client.tWeibo
import urllib.parse
import datetime
import sqlite3
import random
import time
import math
import json
import sys
import re
# 从广场采集微博内容
# 保存到SQLite
def stripLink(html):
	return re.sub("<a [^>]+>(?P<content>.+?)</a>", "\g<content>", html, re.I | re.S)
def replaceQFace(html):
	html = re.sub("<img .+?title='(?P<title>[^']+)'>", '/\g<title> ', html, re.I)
	return re.sub("<img .+?title='(?P<title>[^']+)' k='(?P<k>[^']+)'>", '/\g<k>\g<title> ', html, re.I)
	
connect = sqlite3.connect('weibo.db')
cursor = connect.cursor()
# cursor.execute("CREATE TABLE IF NOT EXISTS weibo_content (id INTEGER PRIMARY KEY, channel TEXT, weibo_id TEXT UNIQUE, content TEXT, name TEXT, nick TEXT, timestamp INTEGER, pic TEXT, gathertime TEXT DEFAULT  CURRENT_TIMESTAMP )")
# connect.commit()

	uin = 'QQ号'
	passwd = '密码'
wb = client.tWeibo.tWeibo(uin, passwd)
wb.login()
# 不采集的频道 ('1834', '《大家》'), 
channelIds = [
		('1254', '热门'), 
		('1248', '热门'), 
		('40', '搞笑'), 
		('838', '动漫'), 
		('19', '星座'), 
		('48', '兴趣'), 
		('34', '读书'), 
		('883', '科技'), 
		('1', '时事'), 
		('47', '资讯'), 
		('1331', '蔡奇'), 
		('1688', '路边社'), 
		('882', '杂谈'), 
		('1732', '媒体'), 
		('889', '人生语录'), 
		('1646', '情感'), 
		('1719', '语录'), 
		('1829', '全家福'), 
		('38', '旅行'), 
		('31', '摄影'), 
		('1718', '美图'), 
		('925', '红酒'), 
		('1141', '美食家'), 
		('35', '美食'), 
		('37', '美容'), 
		('36', '服饰搭配'), 
		('49', '时尚'), 
		('844', '贴图'), 
		('843', '真人秀'), 
		('1645', '女神'), 
		('43', '美女'), 
	]
for (channelId, channelName) in channelIds:
	header = {
		'Referer':'http://c.t.qq.com/i/' + channelId,
		'User-Agent' : 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17',
	}
	page = 1
	sqlInsert = "INSERT OR IGNORE INTO weibo_content(channel, weibo_id, content, name, nick, timestamp, pic) VALUES(?, ?, ?, ?, ?, ?, ?)"
	apiURL = 'http://c.t.qq.com/asyn/selectedAutoUpdate.php?'
	furl = apiURL + urllib.parse.urlencode({
			'cid' : channelId,
			'n' : '30',
			'pgv_ref' : 'web.c.page.nav.tree.level1',
			'turn' : '1',
			'version' : '4',
			'personalOrder' : '0',
			'apiType' : '7',
			'apiHost' : 'http://api.t.qq.com',
			'_r' : math.floor(datetime.datetime.now().timestamp() * 1000),
		})
	rs = wb.get(furl, header).data.decode('utf-8')
	fp = open('data\\channel\\%s.json' % channelId, 'w',  encoding='utf-8')
	while True:
		fp.write(rs.strip() + '\n')
		print("广场 > %s -- %d" % (channelName, page))
		try:
			result = json.loads(rs)
		except ValueError as e:
			print(rs)
			print(e)
			sys.exit()
		if len(result['info']['talk']) == 0:
			break
		for talk in result['info']['talk']:
			try:
				tid = talk['id']
				timestamp = talk['timestamp']
				if talk['type'] != 1 or talk['content'] is None:
					continue
				cursor.execute(sqlInsert, (channelName, talk['id'], stripLink(replaceQFace(talk['content'])), talk['name'], talk['nick'], talk['timestamp'], ','.join(talk['image']),))
			except KeyError as e:
				print(talk)
				print(e)
		connect.commit()
		page += 1
		furl = apiURL + urllib.parse.urlencode({
				'cid' : channelId,
				'n' : '30',
				'pgv_ref' : 'web.c.page.nav.tree.level1',
				'turn' : '1',
				'version' : '2',
				'personalOrder' : '0',
				'r' : math.floor(datetime.datetime.now().timestamp() * 1000),
				'p' : page,
				'id' : tid,
				'time' : timestamp,
				'apiType' : '7',
				'apiHost' : 'http//api.t.qq.com',
				'_r' : math.floor(datetime.datetime.now().timestamp() * 1000),
			})
		rs = wb.get(furl, header).data.decode('utf-8')
	fp.close()
connect.commit()
cursor.close()
connect.close()
